######################
##### PREPARATION ####
######################
rm(list=setdiff(ls(), "path_to"))

# Load survey data
s = readRDS(path_to("wide"))
s_robust = readRDS(path_to("robustness_beliefs"))

# Load original coded data
files = list.files(path_to("coded_data"))
files = files[!grepl("_pref_", files)]
coded = list()
for (f in files) {
  coded[[f]] = read.csv2(path_to("coded_data", f), as.is=T)
  colnames(coded[[f]])[1] = "ResponseId"
  coded[[f]]$file = f
  if (grepl("conflicts", f)) {
    coded[[f]] = coded[[f]] %>% filter(ResponseId != "" & coder == "")
    coded[[f]]$coder = "conflict"
  }
}
coded = rbind.fill(coded)


######################
##### ILLEGAL CODES? #
######################
# Legal codes
legal_codes = c("naive", "dampening", "tiny", "spillover", "other", "misunderstanding", "junk")

# Delete white space
coded$code_effect = gsub(" ", "", coded$code_effect)

# Save original code
coded$old = coded$code_effect

# Correct typos
# Transform the code "endogenous" to "other" (only few cases found)
rnm = c(
  "," = "\\.",
  "other" = "endogenous",
  "naive" = "naive,other",
  "tiny" = "tiny,other"
)
for (i in 1:length(rnm)) {
  coded$code_effect = gsub(rnm[i], names(rnm)[i], coded$code_effect, perl=T)
}

# Delete the code discussion, an ancillary code that applies if respondents
# additionally consider but do not follow alternative arguments.
del = c("discussion", "^,", ",$")
for (i in 1:length(del)) {
  coded$code_effect = gsub(del[i], "", coded$code_effect, perl=T)
}

# Check whether any patterns do not match the codebook
codes = unique(unlist(strsplit(coded$code_effect, ",")))
illegal = codes[!codes %in% legal_codes]
illegal

# Return problematic cases
cases = which(sapply(strsplit(coded$code_effect, ","), function(x) {any(illegal %in% x)}))
coded[cases, c("ResponseId", "file")]

# Check whether renaming was successful
#coded[coded$old != coded$code_effect, c("old", "code_effect")] %>% View

# Bring codes in alphabetical order
coded$code_effect = sapply(strsplit(coded$code_effect, ","), function(x) {
  paste0(x[order(x)], collapse=",")
})


######################
##### SIMPLIFY CODES #
#######################
# Rename and delete
coded$code_effect_simple = coded$code_effect
rnm = c(
  "dampening" = "dampening,tiny|dampening,naive",
  "tiny" = "naive,tiny",
  "spillover" = "naive,spillover",
  "dampening" = "tiny",
  "other" = "junk|misunderstanding"
)
del = c("^,", ",$")
for (i in 1:length(rnm)) {
  coded$code_effect_simple = gsub(rnm[i], names(rnm)[i], coded$code_effect_simple, perl=T)
}
for (i in 1:length(del)) {
  coded$code_effect_simple = gsub(del[i], "", coded$code_effect_simple, perl=T)
}

# Delete unneeded columns
coded$old = NULL

# Export original codes for IRR calculation
saveRDS(coded[!grepl("conflict|robust", coded$file), ], path_to("codes_preconflict"))


###########################
##### CONFLICT MANAGEMENT #
###########################
# Extract originals and conflict solution files
originals = coded[!grepl("conflict", coded$file), ]
solutions = coded[grepl("conflict", coded$file), ]
originals = originals %>% filter(ResponseId %in% c(s$ResponseId, s_robust$ResponseId))
solutions = solutions %>% filter(ResponseId %in% c(s$ResponseId, s_robust$ResponseId))

# Always two codes for originals, only one solution code
table(sapply(originals$ResponseId, function(x) sum(originals$ResponseId %in% x)),
      sapply(1:nrow(originals), function(x) originals$file[x])) # All 2, except for robustness batches
table(table(solutions$ResponseId)) # All 1

# Coders agree
no_conflict = unique(originals$ResponseId) %>%
  .[sapply(., function(i) {
    originals[originals$ResponseId %in% i, "code_effect"] %>% unique %>% length == 1
  })]

# Coders disagree
conflict = unique(originals$ResponseId) %>%
  .[sapply(., function(i) {
    originals[originals$ResponseId %in% i, "code_effect"] %>% unique %>% length > 1
  })]

# Conflict solution for each disagreement available
table(conflict %in% solutions$ResponseId) # All TRUE

# Determine open conflicts
open_conflicts = conflict[!conflict %in% solutions$ResponseId]
length(open_conflicts) == 0 # Should be TRUE

# Step 1: Take no_conflict cases from the first coder
coded = coded %>% filter(ResponseId %in% no_conflict)
coded = coded[!duplicated(coded$ResponseId), ]

# Step 2: Add solved conflicts
coded = rbind(coded, solutions[solutions$ResponseId %in% conflict, ])

# Manually check whether only legal code combinations are used
table(coded$code_effect_simple)
table(coded$code_effect)


##############################################################
########## SAVE DATA #########################################
##############################################################
saveRDS(coded, path_to("coded"))

